5 Metrics for 1hr Bins
5.2 Baccalieu - Ned Walsh 2017-2018
Let’s use 1hr bins this time
## [1] "C:/Users/BoschJ/Desktop/wet-dry_analysis/data/downsampled_1hrs"
Start with the 2017-2018 files from Baccalieu (6 files total)
metrics_md <- read.csv(file.path(data.dir, "metrics_md.csv"))
#get list of downsampled files to process
bacc_2017_md <- metrics_md %>%
filter(colony == "Baccalieu - Ned Walsh",
deployment_period == "2017-2018") %>%
pull(wetdry_filename)
bacc_2017_md <- paste0("filtered_", bacc_2017_md)
DEG_files <- list.files(downsampled.dir, full.names=TRUE)
bacc_2017_files <- DEG_files[basename(DEG_files) %in% bacc_2017_md]Pick a colony and time range to look at
5.2.1 Proportion/Switches
Calculated by: * Binned Periods - Finest resolution, * Daily Periods - Helps assess trends over long time periods, like looking at wet proportions/switches seasonally * Monthly Periods - Lowest resolution ()
5.2.1.0.1 1hr Periods
Calculate Proportions/Switches within each 1hr Bin
calc_metrics <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types=FALSE)
bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
#start by calculating metrics for a single 1hr bin
metrics <- deg_data %>%
mutate(
bin_length = bin_time * 3600, # in secs
wets = ifelse(is.na(wets), 0, wets), #replace NAs with wet=0
#proportion of time wet in 1hr period (wets*30 converts wetness level into secs spent wet)
prop_wet = ifelse(bin_length >0, (wets * 30) / bin_length),
#switches between wet (non-zero) and dry (zero) states
switches = ifelse(row_number() == 1, 0, abs(diff(ifelse(wets > 0, 1, 0)))),
#cumulative time spent dry (== 0) or wet (>0)
dry_time= ifelse(wets == 0, 10*60, 0), # if dry, time always 600 secs
wet_time = ifelse(wets > 0, 10*60, 0), # if wet, time always 600 secs
bird_id = bird_id)
return(metrics)
}
bin_metrics <- future_map_dfr(chosen_files, calc_metrics)
head(bin_metrics)## # A tibble: 6 × 10
## date start_time end_time wets bin_length prop_wet switches dry_time
## <date> <time> <time> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2017-09-20 00:00 00:59:59 0 3600 0 0 600
## 2 2017-09-20 01:00 01:59:59 0 3600 0 0 600
## 3 2017-09-20 02:00 02:59:59 0 3600 0 0 600
## 4 2017-09-20 03:00 03:59:59 0 3600 0 0 600
## 5 2017-09-20 04:00 04:59:59 0 3600 0 1 600
## 6 2017-09-20 05:00 05:59:59 5.33 3600 0.0444 0 0
## # ℹ 2 more variables: wet_time <dbl>, bird_id <chr>
Plot proportions on a heatmap per device
plotly_heatmap_prop <- plot_ly(
data = bin_metrics,
x = ~date, y = ~bird_id, z = ~prop_wet,
type = "heatmap",
colors =c("white", "blue"),
colorbar = list(title= "Proportion Wet")
) %>%
layout(
title = paste("Heatmap (bin =", bin_time, "hour)"),
xaxis=list(title = "Date", tickformat = "%b %Y", tickangle = 45, tickmode = "linear", dtick="M1"),
yaxis = list(title = "Bird ID"),
margin = list(l = 100, b = 100, t = 50)
)
plotly_heatmap_propProportion scales for 1hr and 2hr bins are quite different.
For 1-hour bins, bin_length = 3600 seconds. For 2-hour bins, bin_length = 7200 seconds Wetness intervals are averaged over a larger time when bins = 2hrs, so when you aggregate over a longer period it’s les sliekly for the bin to contain a higher wetness level.
5.2.1.0.2 Daily Periods
Aggregate bins to DAYS for visualising daily proportions, switches and wet/dry times
calc_daily_metrics <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types=FALSE)
bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
#start by calculating metrics for a single bin
metrics <- deg_data %>%
mutate(
bin_length = bin_time * 3600, #in secs
wets = ifelse(is.na(wets), 0, wets), #replace NAs with wets
#proportion of time wet in 1hr period (wets*30 converts wetness level into secs spent wet)
# (600sec per 10min interval) / 20 = 30 seconds per unit
prop_wet = ifelse(bin_length >0, (wets * 30) / bin_length),
#switches between wet (non-zero) and dry (zero) states
switches= ifelse(row_number() == 1,0,
abs(diff(ifelse(wets > 0, 1, 0)))),
#cumulative time spent dry (== 0) or wet (>0)
dry_time= ifelse(wets == 0, 10*60, 0), # if dry, time always 600 secs
wet_time = ifelse(wets > 0, 10*60, 0) # if wet, time always 600 secs
) %>%
#summarize the metrics for the full deployment period, grouped by day
group_by(date) %>%
summarize(
bird_id = bird_id,
total_prop_wet = sum(prop_wet, na.rm = TRUE), #take sum of proportions for daily
total_switches = sum(switches, na.rm=TRUE),
total_dry_time = sum(dry_time, na.rm=TRUE),
total_wet_time = sum(wet_time, na.rm=TRUE),
.groups="drop")
return(metrics)
}
daily_metrics <- future_map_dfr(chosen_files, calc_daily_metrics)
daily_metrics## # A tibble: 1,706 × 6
## date bird_id total_prop_wet total_switches total_dry_time
## <date> <chr> <dbl> <dbl> <dbl>
## 1 2017-09-20 BH584 0.604 1 3000
## 2 2017-09-21 BH584 0.879 2 600
## 3 2017-09-22 BH584 0.351 10 3600
## 4 2017-09-23 BH584 0.521 10 3000
## 5 2017-09-24 BH584 0.296 12 5400
## 6 2017-09-25 BH584 0.218 16 7200
## 7 2017-09-26 BH584 1.18 6 1800
## 8 2017-09-27 BH584 0.628 12 4200
## 9 2017-09-28 BH584 1.01 10 4800
## 10 2017-09-29 BH584 0.815 7 3600
## # ℹ 1,696 more rows
## # ℹ 1 more variable: total_wet_time <dbl>
Let’s use a lower resolution, down-sampling by 1hr periods instead
#functions to plot proportions and switches
plot_daily_metrics <- function(data, y_var, y_label, plot_title) {
ggplot(data, aes(x = as.Date(date), y = !!sym(y_var), color = bird_id)) +
geom_line(size = 0.2) +
geom_point(size = 1) +
labs(
title = plot_title,
x = "Time (days)",
y = y_label,
color = "Bird ID"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
title = element_text(size = 8),
legend.position = "right",
legend.direction = "vertical",
legend.box = "vertical",
legend.spacing.y = unit(0.2, "cm")
) +
scale_x_date(
breaks = "1 month",
labels = scales::date_format("%b %Y")
)
}
# Plotly plots
plotly_daily_prop <- ggplotly(
plot_daily_metrics(
daily_metrics,
y_var = "total_prop_wet",
y_label = "Proportion of Wet Periods (day)",
plot_title = "Daily Proportion of Wet Periods"
)
)
plotly_daily_switch <- ggplotly(
plot_daily_metrics(
daily_metrics,
y_var = "total_switches",
y_label = "Number of Wet-Dry Switches",
plot_title = "Number of Switches Between Wet-Dry Periods Per Day"
)
)
#Use patchwork to plot together
subplot(plotly_daily_prop, plotly_daily_switch,
nrows = 2, shareX = TRUE, shareY = FALSE)Number of switches mor ein detail here for 1hr bins
5.2.2 Longest Wet/Dry Periods
calc_longest_prds <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types = FALSE)
#add a datetime_start column
deg_data <- deg_data %>%
mutate(datetime_start = as.POSIXct(paste(date, start_time), format = "%Y-%m-%d %H:%M:%S"),
datetime_end = as.POSIXct(paste(date, end_time), format = "%Y-%m-%d %H:%M:%S"))
# define bird IDs
bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
# ================================
# Calculate longest periods
# ================================
calc_prds <- function(condition) {
rle_data <- rle(condition)
if (!any(rle_data$values)) {return(list(start = NA, end = NA, duration = 0))}
longest_run <- max(rle_data$lengths[rle_data$values]) #longest run of wets>0 from lengths of vals
end_x <- cumsum(rle_data$lengths)[ #cumul. sum of bin lengths for run end index
which.max(rle_data$lengths[rle_data$values]) ]#index = longest period
start_x <- end_x - longest_run +1 #just subtract # of bins in wet prd
#find when the end and start of longest TRUE prd in the datasets
list(
start = deg_data$datetime_start[start_x],
end = deg_data$datetime_start[end_x],
duration = longest_run * 1) #duration is the longest period we found times the bin time
}
#calculate the periods
longest_wet <- calc_prds(deg_data$wets>0)
longest_wetter <- calc_prds(deg_data$wets>5)
longest_wettest <- calc_prds(deg_data$wets==20)
longest_dry <- calc_prds(deg_data$wets==0)
#return dataframe
return(data.frame(
bird_id = bird_id,
longest_wet = longest_wet$duration,
datetime_start_wet = longest_wet$start,
datetime_end_wet = longest_wet$end,
longest_wetter = longest_wetter$duration,
datetime_start_wetter = longest_wetter$start,
datetime_end_wetter = longest_wetter$end,
longest_wettest = longest_wettest$duration,
datetime_start_maxwet = longest_wettest$start,
datetime_end_maxwet = longest_wettest$end,
longest_dry = longest_dry$duration,
datetime_start_dry = longest_dry$start,
datetime_end_dry = longest_dry$end
))
}
# run on gull 2017-2018 files
longest_prds <- future_map_dfr(chosen_files, calc_longest_prds)
longest_prds## bird_id longest_wet datetime_start_wet datetime_end_wet longest_wetter
## 1 BH584 382 2017-11-06 07:00:00 2017-11-22 04:00:00 20
## 2 BH594 211 2017-11-04 07:00:00 2017-11-13 01:00:00 20
## 3 BH595 169 2017-10-20 03:00:00 2017-10-27 03:00:00 55
## 4 BH596 139 2017-11-11 08:00:00 2017-11-17 02:00:00 18
## 5 BH602 357 2017-09-29 14:00:00 2017-10-14 10:00:00 20
## 6 BH603 215 2017-10-03 21:00:00 2017-10-12 19:00:00 64
## datetime_start_wetter datetime_end_wetter longest_wettest
## 1 2018-02-06 22:00:00 2018-02-07 17:00:00 4
## 2 2017-12-18 09:00:00 2017-12-19 04:00:00 8
## 3 2018-01-12 20:00:00 2018-01-15 02:00:00 9
## 4 2017-12-04 12:00:00 2017-12-05 05:00:00 5
## 5 2017-11-01 11:00:00 2017-11-02 06:00:00 6
## 6 2018-01-09 07:00:00 2018-01-11 22:00:00 6
## datetime_start_maxwet datetime_end_maxwet longest_dry datetime_start_dry
## 1 2017-11-08 12:00:00 2017-11-08 15:00:00 75 2017-12-19 23:00:00
## 2 2018-02-09 06:00:00 2018-02-09 13:00:00 94 2017-12-14 02:00:00
## 3 2017-12-19 03:00:00 2017-12-19 11:00:00 96 2017-12-25 23:00:00
## 4 2017-12-17 08:00:00 2017-12-17 12:00:00 75 2017-12-03 03:00:00
## 5 2018-01-07 05:00:00 2018-01-07 10:00:00 101 2017-11-09 19:00:00
## 6 2017-11-23 12:00:00 2017-11-23 17:00:00 125 2017-12-03 20:00:00
## datetime_end_dry
## 1 2017-12-23 01:00:00
## 2 2017-12-17 23:00:00
## 3 2017-12-29 22:00:00
## 4 2017-12-06 05:00:00
## 5 2017-11-13 23:00:00
## 6 2017-12-09 00:00:00
NOTE: longest_wet is a value of how many bins. So if longest_wet = 764, that means that the bird’s device recorded wetness for at least part of each 1-hour period over a continuous span of 764 hrs. It does not imply that the bird was continuously wet throughout the entire 1-hour bins during this period since there may have been fluctuations between wet and dry states within each bin.
dep_date <- as.POSIXct("2017-09-20", format = "%Y-%m-%d")
ret_date <- as.POSIXct("2018-06-29", format = "%Y-%m-%d")
plot_longest_prds <- ggplot(longest_prds, aes(y = bird_id)) +
# Wet at least once in 1-hour bin (dotted black line)
geom_segment(aes(x = datetime_start_wet,
xend = datetime_end_wet, yend = bird_id, color = "Wet Period"),
size = 1, linetype = "dotted") +
# Longest when wet > 5
geom_segment(aes(x = datetime_start_wetter,
xend = datetime_end_wetter, yend = bird_id, color = "Wetter > 5"),
size = 1) +
# Longest entirely wet period (solid blue line)
geom_segment(aes(x = datetime_start_maxwet,
xend = datetime_end_maxwet, yend = bird_id, color = "Max Wet Period"),
size = 1) +
# Longest dry period (dashed blue line)
geom_segment(aes(x =datetime_start_dry,
xend = datetime_end_dry, yend = bird_id, color = "Dry Period"),
size = 1, linetype = "dashed") +
# Labels and formatting
labs(title = "Longest Wet Periods",
x = "Time", y = "Bird ID", color = "Period Type") +
# Customize x-axis scale
scale_x_datetime(breaks = date_breaks("1 month"), labels = date_format("%b %Y"),
limits = c(dep_date, ret_date)) +
# Define color palette for the legend
scale_color_manual(values = c(
"Wet Period" = "lightblue",
"Wetter > 5" = "blue1",
"Max Wet Period" = "red",
"Dry Period" = "white")) +
# Theme adjustments
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom" # Adjust legend position
)
# Convert to interactive plotly plot
plotly_longest_prds <- ggplotly(plot_longest_prds) %>%
layout(margin = list(l = 50, r = 50, b = 100, t = 50),
xaxis = list(tickangle = 45))
plotly_longest_prdsAs a consequence, the light blue line here showing the longest period when wet was greater than 0 looks like it spans across the whole time. There were definitely periods within those 1 hour bins when wets = 0 for longer though.
5.2.3 Wet-Dry Full Deployment
Lastly, let’s look at all of the wet and dry periods throughout the entire deployment time so we can visualise how they change over time and maybe see when those really long periods of entirely wet are
prepare_plot_data <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types = FALSE) %>%
mutate(
datetime_start = as.POSIXct(paste(date, start_time), format = "%Y-%m-%d %H:%M:%S"),
bird_id = str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}"),
time_window = as.POSIXct(paste(date, start_time), format = "%Y-%m-%d %H:%M:%S")
) %>%
group_by(bird_id, time_window) %>%
summarize(avg_wets = mean(wets, na.rm = TRUE), .groups = "drop")
return(deg_data)
}
plot_data <- chosen_files %>%
lapply(prepare_plot_data) %>%
bind_rows()
# data for all wets during deployment periods
plot_all_wet_periods <- ggplot(plot_data, aes(x = time_window, y = avg_wets, color = bird_id)) +
geom_line() +
labs(
title = "Dry and Wet Periods for Baccalieu Island - Ned Walsh (2017-2018)",
x = "Time (1hr intervals)",
y = "Wetness",
color = "Bird ID"
) +
scale_y_continuous(
breaks = seq(0, 20, 5)
) +
scale_x_datetime(
breaks = "1 month",
labels = date_format("%b %Y"),
limits = c(dep_date, ret_date) # Ensure this works with POSIXct limits
) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
)
# Convert to interactive plotly plot
plotly_bacc_2017_all_wet <- ggplotly(plot_all_wet_periods) %>%
layout(margin = list(l = 50, r = 50, b = 100, t = 50),
xaxis = list(tickangle = 45))
plotly_bacc_2017_all_wet5.3 All 2021 files
## [1] "C:/Users/BoschJ/Desktop/wet-dry_analysis/data/downsampled_1hrs"
bin_time = 1
#get list of downsampled files to process
md_2021 <- metrics_md %>%
filter(deployment_period == "2021-2022") %>%
pull(wetdry_filename)
md_2021 <- paste0("filtered_", md_2021)
DEG_files <- list.files(downsampled.dir, full.names=TRUE)
files_2021 <- DEG_files[basename(DEG_files) %in% md_2021]
chosen_files <- files_20215.3.1 Proportion/Switches
Calculated by: * Binned Periods - Finest resolution, * Daily Periods - Helps assess trends over long time periods, like looking at wet proportions/switches seasonally * Monthly Periods - Lowest resolution ()
5.3.1.0.1 1hr Periods
Calculate Proportions/Switches within each 1hr Bin
calc_metrics <- function(file_path) {
deg_data <- read_csv(file_path, show_col_types=FALSE)
bird_id <- str_extract(basename(file_path), "(?<=filtered_)[A-Z-0-9]{5}")
#start by calculating metrics for a single 1hr bin
metrics <- deg_data %>%
mutate(
bin_length = bin_time * 3600, # in secs
wets = ifelse(is.na(wets), 0, wets), #replace NAs with wet=0
#proportion of time wet in 1hr period (wets*30 converts wetness level into secs spent wet)
prop_wet = ifelse(bin_length >0, (wets * 30) / bin_length),
#switches between wet (non-zero) and dry (zero) states
switches = ifelse(row_number() == 1, 0, abs(diff(ifelse(wets > 0, 1, 0)))),
#cumulative time spent dry (== 0) or wet (>0)
dry_time= ifelse(wets == 0, 10*60, 0), # if dry, time always 600 secs
wet_time = ifelse(wets > 0, 10*60, 0), # if wet, time always 600 secs
bird_id = bird_id)
return(metrics)
}
bin_metrics <- future_map_dfr(chosen_files, calc_metrics, .progress = TRUE)
head(bin_metrics)## # A tibble: 6 × 10
## date start_time end_time wets bin_length prop_wet switches dry_time
## <date> <time> <time> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2021-08-19 00:00 00:59:59 0 3600 0 0 600
## 2 2021-08-19 01:00 01:59:59 0 3600 0 0 600
## 3 2021-08-19 02:00 02:59:59 0 3600 0 1 600
## 4 2021-08-19 03:00 03:59:59 1 3600 0.00833 0 0
## 5 2021-08-19 04:00 04:59:59 5.33 3600 0.0444 0 0
## 6 2021-08-19 05:00 05:59:59 1 3600 0.00833 0 0
## # ℹ 2 more variables: wet_time <dbl>, bird_id <chr>
Plot proportions on a heatmap per device
plotly_heatmap_prop <- plot_ly(
data = bin_metrics,
x = ~date, y = ~bird_id, z = ~prop_wet,
type = "heatmap",
colors =c("white", "blue"),
colorbar = list(title= "Proportion Wet")
) %>%
layout(
title = paste("Heatmap (bin =", bin_time, "hour)"),
xaxis=list(title = "Date", tickformat = "%b %Y", tickangle = 45, tickmode = "linear", dtick="M1"),
yaxis = list(title = "Bird ID"),
margin = list(l = 100, b = 100, t = 50)
)
plotly_heatmap_prop